import openpyxl
from openpyxl.utils import column_index_from_string


def extract_toutiao_article_id(url):
    """提取 toutiao.com/article/ 后的字符串"""
    if url and isinstance(url, str):
        parts = url.split("/")
        for i, part in enumerate(parts):
            if part == "article":
                return parts[i + 1] if i + 1 < len(parts) else None
    return None


def extract_toutiao_i_id(url):
    """提取 toutiao.com/i 后的字符串"""
    if url and isinstance(url, str):
        parts = url.split("/")
        for i, part in enumerate(parts):
            if part == "toutiao.com":
                return parts[i+1].split("i")[1] if i + 1 < len(parts) else None
    return None


def process_excel_files():
    # 文件路径
    # TODO 修改表格地址
    main_file = "C:/Users/qwx1425249/Desktop/data/开发者空间案例内外媒运营数据看板.xlsx"
    tz_file = "C:/Users/qwx1425249/Desktop/data/头条&知乎.xlsx"

    # 加载工作簿
    wb_main = openpyxl.load_workbook(main_file)
    ws_main = wb_main["案例发布计划"]

    wb_tz = openpyxl.load_workbook(tz_file)
    ws_tz = wb_tz["Sheet1"]

    # 列索引映射
    col_N = column_index_from_string("L")  # N 列
    col_O = column_index_from_string("M")  # O 列
    col_R = column_index_from_string("P")  # R 列
    col_S = column_index_from_string("Q")  # S 列

    col_A = column_index_from_string("A")  # A 列
    col_B = column_index_from_string("B")  # B 列
    col_C = column_index_from_string("C")  # C 列
    col_D = column_index_from_string("D")  # D 列

    # 构建 头条&知乎.xlsx A 列与 B 列的映射
    tz_a_dict = {}
    for row in ws_tz.iter_rows(min_col=col_A, max_col=col_B, values_only=True):
        url, value = row
        if url:
            key = extract_toutiao_i_id(url)
            if key:
                tz_a_dict[key] = value

    # 构建 头条&知乎.xlsx C 列与 D 列的映射
    tz_c_dict = {}
    for row in ws_tz.iter_rows(min_col=col_C, max_col=col_D, values_only=True):
        url, value = row
        if url:
            tz_c_dict[url] = value

    # 处理 N 列与 O 列
    for row in ws_main.iter_rows(min_col=col_N, max_col=col_N):
        cell = row[0]
        if not cell.value:
            continue
        n_url = cell.value
        article_id = extract_toutiao_article_id(n_url)
        if article_id in tz_a_dict:
            o_cell = ws_main.cell(row=cell.row, column=col_O)
            o_cell.value = tz_a_dict[article_id]

    # 处理 R 列与 S 列
    for row in ws_main.iter_rows(min_col=col_R, max_col=col_R):
        cell = row[0]
        if not cell.value:
            continue
        r_url = cell.value
        if r_url in tz_c_dict:
            s_cell = ws_main.cell(row=cell.row, column=col_S)
            s_cell.value = tz_c_dict[r_url]

    # 保存修改
    wb_main.save(main_file)
    print(f"✅ 已更新 {main_file} 的 O 列和 S 列数据。")


# 执行函数
if __name__ == "__main__":
    process_excel_files()